--- title: 'Forecasting Seven Days Average Number of COVID19 Cases' date: 2021-08-04 permalink: posts/2021/08/04/predicting_nsw_covid_cases categories: - covid19 - python - machine learning - bayes modelling tags: - python - machine learning - bayes modelling layout: notebook ---
%load_ext autoreload
%autoreload 2
import sys
from datetime import timedelta, datetime
from pathlib import Path
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
WARNING (theano.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
Setup figure plot settings
%config InlineBackend.figure_format = 'retina'
az.style.use("arviz-darkgrid")
sns.set_palette("deep")
Setup paths to import scripts
PROJECT_ROOT = Path.cwd().parent.resolve()
sys.path.append(str(PROJECT_ROOT))
from src.data.make_dataset import make_cases_training_data, make_train_test_split, get_prior_distr_params
data = make_cases_training_data()
data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 55 entries, 0 to 54 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 notification_date 55 non-null datetime64[ns] 1 Daily Number of Cases 55 non-null int64 2 Cumsum 55 non-null int64 3 Daily Difference 54 non-null float64 4 Growth Factor 53 non-null float64 5 Weekly Rolling Average 49 non-null float64 6 Pct Change 48 non-null float64 7 Weekly Average CumSum 49 non-null float64 8 Epidemiological Days 55 non-null float64 dtypes: datetime64[ns](1), float64(6), int64(2) memory usage: 6.4 KB
data
| notification_date | Daily Number of Cases | Cumsum | Daily Difference | Growth Factor | Weekly Rolling Average | Pct Change | Weekly Average CumSum | Epidemiological Days | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2021-06-16 | 3 | 3 | NaN | NaN | NaN | NaN | NaN | -24.0 |
| 1 | 2021-06-17 | 1 | 4 | -2.0 | NaN | NaN | NaN | NaN | -23.0 |
| 2 | 2021-06-18 | 2 | 6 | 1.0 | -0.500000 | NaN | NaN | NaN | -22.0 |
| 3 | 2021-06-19 | 1 | 7 | -1.0 | -1.000000 | NaN | NaN | NaN | -21.0 |
| 4 | 2021-06-20 | 2 | 9 | 1.0 | -1.000000 | NaN | NaN | NaN | -20.0 |
| 5 | 2021-06-21 | 5 | 14 | 3.0 | 3.000000 | NaN | NaN | NaN | -19.0 |
| 6 | 2021-06-22 | 17 | 31 | 12.0 | 4.000000 | 4.0 | NaN | 4.0 | -18.0 |
| 7 | 2021-06-23 | 12 | 43 | -5.0 | -0.416667 | 6.0 | 0.500000 | 10.0 | -17.0 |
| 8 | 2021-06-24 | 21 | 64 | 9.0 | -1.800000 | 9.0 | 0.500000 | 19.0 | -16.0 |
| 9 | 2021-06-25 | 28 | 92 | 7.0 | 0.777778 | 12.0 | 0.333333 | 31.0 | -15.0 |
| 10 | 2021-06-26 | 25 | 117 | -3.0 | -0.428571 | 16.0 | 0.333333 | 47.0 | -14.0 |
| 11 | 2021-06-27 | 21 | 138 | -4.0 | 1.333333 | 18.0 | 0.125000 | 65.0 | -13.0 |
| 12 | 2021-06-28 | 14 | 152 | -7.0 | 1.750000 | 20.0 | 0.111111 | 85.0 | -12.0 |
| 13 | 2021-06-29 | 23 | 175 | 9.0 | -1.285714 | 21.0 | 0.050000 | 106.0 | -11.0 |
| 14 | 2021-06-30 | 30 | 205 | 7.0 | 0.777778 | 23.0 | 0.095238 | 129.0 | -10.0 |
| 15 | 2021-07-01 | 27 | 232 | -3.0 | -0.428571 | 24.0 | 0.043478 | 153.0 | -9.0 |
| 16 | 2021-07-02 | 34 | 266 | 7.0 | -2.333333 | 25.0 | 0.041667 | 178.0 | -8.0 |
| 17 | 2021-07-03 | 22 | 288 | -12.0 | -1.714286 | 24.0 | -0.040000 | 202.0 | -7.0 |
| 18 | 2021-07-04 | 31 | 319 | 9.0 | -0.750000 | 26.0 | 0.083333 | 228.0 | -6.0 |
| 19 | 2021-07-05 | 16 | 335 | -15.0 | -1.666667 | 26.0 | 0.000000 | 254.0 | -5.0 |
| 20 | 2021-07-06 | 33 | 368 | 17.0 | -1.133333 | 28.0 | 0.076923 | 282.0 | -4.0 |
| 21 | 2021-07-07 | 36 | 404 | 3.0 | 0.176471 | 28.0 | 0.000000 | 310.0 | -3.0 |
| 22 | 2021-07-08 | 45 | 449 | 9.0 | 3.000000 | 31.0 | 0.107143 | 341.0 | -2.0 |
| 23 | 2021-07-09 | 49 | 498 | 4.0 | 0.444444 | 33.0 | 0.064516 | 374.0 | -1.0 |
| 24 | 2021-07-10 | 103 | 601 | 54.0 | 13.500000 | 45.0 | 0.363636 | 419.0 | 0.0 |
| 25 | 2021-07-11 | 114 | 715 | 11.0 | 0.203704 | 57.0 | 0.266667 | 476.0 | 1.0 |
| 26 | 2021-07-12 | 72 | 787 | -42.0 | -3.818182 | 65.0 | 0.140351 | 541.0 | 2.0 |
| 27 | 2021-07-13 | 88 | 875 | 16.0 | -0.380952 | 72.0 | 0.107692 | 613.0 | 3.0 |
| 28 | 2021-07-14 | 75 | 950 | -13.0 | -0.812500 | 78.0 | 0.083333 | 691.0 | 4.0 |
| 29 | 2021-07-15 | 100 | 1050 | 25.0 | -1.923077 | 86.0 | 0.102564 | 777.0 | 5.0 |
| 30 | 2021-07-16 | 108 | 1158 | 8.0 | 0.320000 | 94.0 | 0.093023 | 871.0 | 6.0 |
| 31 | 2021-07-17 | 95 | 1253 | -13.0 | -1.625000 | 93.0 | -0.010638 | 964.0 | 7.0 |
| 32 | 2021-07-18 | 111 | 1364 | 16.0 | -1.230769 | 93.0 | 0.000000 | 1057.0 | 8.0 |
| 33 | 2021-07-19 | 65 | 1429 | -46.0 | -2.875000 | 92.0 | -0.010753 | 1149.0 | 9.0 |
| 34 | 2021-07-20 | 106 | 1535 | 41.0 | -0.891304 | 94.0 | 0.021739 | 1243.0 | 10.0 |
| 35 | 2021-07-21 | 142 | 1677 | 36.0 | 0.878049 | 104.0 | 0.106383 | 1347.0 | 11.0 |
| 36 | 2021-07-22 | 141 | 1818 | -1.0 | -0.027778 | 110.0 | 0.057692 | 1457.0 | 12.0 |
| 37 | 2021-07-23 | 151 | 1969 | 10.0 | -10.000000 | 116.0 | 0.054545 | 1573.0 | 13.0 |
| 38 | 2021-07-24 | 142 | 2111 | -9.0 | -0.900000 | 123.0 | 0.060345 | 1696.0 | 14.0 |
| 39 | 2021-07-25 | 168 | 2279 | 26.0 | -2.888889 | 131.0 | 0.065041 | 1827.0 | 15.0 |
| 40 | 2021-07-26 | 140 | 2419 | -28.0 | -1.076923 | 141.0 | 0.076336 | 1968.0 | 16.0 |
| 41 | 2021-07-27 | 162 | 2581 | 22.0 | -0.785714 | 149.0 | 0.056738 | 2117.0 | 17.0 |
| 42 | 2021-07-28 | 257 | 2838 | 95.0 | 4.318182 | 166.0 | 0.114094 | 2283.0 | 18.0 |
| 43 | 2021-07-29 | 181 | 3019 | -76.0 | -0.800000 | 172.0 | 0.036145 | 2455.0 | 19.0 |
| 44 | 2021-07-30 | 198 | 3217 | 17.0 | -0.223684 | 178.0 | 0.034884 | 2633.0 | 20.0 |
| 45 | 2021-07-31 | 244 | 3461 | 46.0 | 2.705882 | 193.0 | 0.084270 | 2826.0 | 21.0 |
| 46 | 2021-08-01 | 192 | 3653 | -52.0 | -1.130435 | 196.0 | 0.015544 | 3022.0 | 22.0 |
| 47 | 2021-08-02 | 208 | 3861 | 16.0 | -0.307692 | 206.0 | 0.051020 | 3228.0 | 23.0 |
| 48 | 2021-08-03 | 239 | 4100 | 31.0 | 1.937500 | 217.0 | 0.053398 | 3445.0 | 24.0 |
| 49 | 2021-08-04 | 244 | 4344 | 5.0 | 0.161290 | 215.0 | -0.009217 | 3660.0 | 25.0 |
| 50 | 2021-08-05 | 329 | 4673 | 85.0 | 17.000000 | 236.0 | 0.097674 | 3896.0 | 26.0 |
| 51 | 2021-08-06 | 301 | 4974 | -28.0 | -0.329412 | 251.0 | 0.063559 | 4147.0 | 27.0 |
| 52 | 2021-08-07 | 263 | 5237 | -38.0 | 1.357143 | 254.0 | 0.011952 | 4401.0 | 28.0 |
| 53 | 2021-08-08 | 265 | 5502 | 2.0 | -0.052632 | 264.0 | 0.039370 | 4665.0 | 29.0 |
| 54 | 2021-08-09 | 296 | 5798 | 31.0 | 15.500000 | 277.0 | 0.049242 | 4942.0 | 30.0 |
train_size = 0.8
X_train, X_test, y_train, y_test = make_train_test_split(data, train_size=train_size)
Get prior distribution parameters
initial_number_of_cases, daily_number_of_cases_std, average_pct_change, std_pct_change = get_prior_distr_params(y_train)
print(f"Number of cases at the start of the time series: {initial_number_of_cases}")
print(f"Daily number of cases standard deviation: {daily_number_of_cases_std}")
print(f"Average percentage change: {average_pct_change}")
print(f"Standard deviation of percentage change: {std_pct_change}")
Number of cases at the start of the time series: 57.0 Daily number of cases standard deviation: 61.225618730143424 Average percentage change: 0.2782721759659427 Standard deviation of percentage change: 0.8433038407229124
# Create PyMC3 context manager
with pm.Model() as model:
# Setup data for the model
t = pm.Data("X", X_train)
cases = pm.Data("y", y_train)
# Intercept
a = pm.Normal("a", mu=initial_number_of_cases, sigma=daily_number_of_cases_std)
# Slope
b = pm.Normal("b", mu=average_pct_change, sigma=std_pct_change)
# Exponential regression
growth = a * (1 + b) ** t
# Likelihood error
eps = pm.HalfNormal("eps")
# Likelihood - Counts here, so poission or negative binomial. Causes issues. Lognormal tends to work better?
pm.Lognormal("cases", mu=np.log(growth), sigma=eps, observed=cases)
trace = pm.sample(1000, tune=1000, cores=5, return_inferencedata=True)
post_Pred = pm.sample_posterior_predictive(trace)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (5 chains in 5 jobs) NUTS: [eps, b, a]
Sampling 5 chains for 1_000 tune and 1_000 draw iterations (5_000 + 5_000 draws total) took 15 seconds.
# Update data reference.
pm.set_data({"X": X_test}, model=model)
# Generate posterior samples
post_pred_test = pm.sample_posterior_predictive(trace, model=model, samples=1000)
/home/vscode/.local/lib/python3.9/site-packages/pymc3/sampling.py:1689: UserWarning: samples parameter is smaller than nchains times ndraws, some draws and/or chains may not be represented in the returned posterior predictive sample warnings.warn(
Get test prediction datam
y_pred = np.mean(post_pred_test["cases"], axis=0)
y_std = np.std(post_pred_test["cases"], axis=0)
Define custom metrics for model performance evaluation
def point_set_distance(y_pred: np.ndarray, y_pred_std: np.ndarray, y_true: np.ndarray
) -> np.ndarray:
"""Compute the point-to-set distance between the predicted and true values.
Args:
y_pred (np.ndarray): Numpy array. Predicted value.
y_true (np.ndarray): Numpy array. True value.
y_pred_std (np.ndarray, optional): Standard deviation of the predicted value.
Returns:
np.ndarray: point-to-set distance: Numpy array.
References:
[1] https://math.stackexchange.com/questions/2099527/distance-between-a-point-and-a-set-or-closure-of-it
"""
assert y_pred.shape == y_true.shape, "The shapes of the predicted and true values do not match."
assert y_pred.shape == y_pred_std.shape, "The shapes of predicted and std values do not match."
return np.where(y_pred - y_pred_std <= y_true <= y_pred + y_pred_std, 0
,min(abs(y_true - (y_pred + y_pred_std))
,abs(y_true - (y_pred - y_pred_std))
)
)
def mean_squared_relative_error(y_pred: np.ndarray, y_true: np.ndarray
,distance: str=None, y_pred_std: np.ndarray=None
) -> float:
"""Compute the mean squared relative error between the predicted and true values.
Args:
y_pred (np.ndarray): Numpy array. Predicted value.
y_true (np.ndarray): Numpy array. True value.
distance (str, optional): Distance metric to use. Defaults to None.
y_pred_std (np.ndarray, optional): Standard deviation of the predicted value.
Returns:
float: mean squared relative error: Numpy array. Mean squared relative error between the predicted and true values.
References:
[1] https://stats.stackexchange.com/questions/413209/is-there-something-like-a-root-mean-square-relative-error-rmsre-or-what-is-t
"""
if distance == "point to set":
d = point_set_distance(y_pred, y_pred_std, y_true)
else:
d = y_pred - y_true
return (d**2/y_true**2).mean()
def relative_mean_squared_error(y_pred: np.ndarray, y_true: np.ndarray
,distance: str=None, y_pred_std: np.ndarray=None
) -> float:
"""Compute reative mean squared error between the predicted and true values.
Args:
y_pred (np.ndarray): Numpy array. Predicted value.
y_true (np.ndarray): Numpy array. True value.
distance (str, optional): Distance metric to use. Defaults to None.
y_pred_std (np.ndarray, optional): Standard deviation of the predicted value.
Returns:
float: Relative mean squared error between the predicted and true values.
References:
[1] https://stats.stackexchange.com/questions/413209/is-there-something-like-a-root-mean-square-relative-error-rmsre-or-what-is-t
"""
if distance == "point to set":
d = point_set_distance(y_pred, y_pred_std, y_true)
else:
d = y_pred - y_true
return (d**2).mean()/(y_true**2).sum()
Calculate performance metrics
r2 = r2_score(y_pred, y_test)
rmse = np.sqrt(mean_squared_error(y_pred, y_test))
rmsre = np.sqrt(mean_squared_relative_error(y_pred, y_test))
rrmse = np.sqrt(relative_mean_squared_error(y_pred, y_test))
mae = mean_absolute_error(y_pred, y_test)
rmae = (abs(y_pred - y_test) / y_test).mean()
print(f"R2 = {r2}")
print(f"RMSE = {rmse}, RMSRE = {rmsre}, RRMSE = {rrmse}")
print(f"MAE = {mae}, RMAE = {rmae}")
R2 = 0.9885588618118113 RMSE = 8.591745999158455, RMSRE = 0.14278126426108265, RRMSE = 0.017879925346330684 MAE = 6.547077597777151, RMAE = 0.07796670568225206
Plot residuals
sns.scatterplot(x=X_test.squeeze(), y=(y_pred - y_test).squeeze(), marker="o")
<AxesSubplot:>
Distribution of model parameters
az.plot_trace(trace)
plt.show()
Summary of model parameters
pm.summary(trace).round(3)
| mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| a | 61.410 | 1.579 | 58.408 | 64.377 | 0.037 | 0.026 | 1872.0 | 1947.0 | 1.0 |
| b | 0.053 | 0.002 | 0.050 | 0.056 | 0.000 | 0.000 | 1760.0 | 1901.0 | 1.0 |
| eps | 0.060 | 0.010 | 0.042 | 0.079 | 0.000 | 0.000 | 1369.0 | 706.0 | 1.0 |
# Update data reference
X = np.sort(np.concatenate([X_train, X_test]).squeeze()).reshape(-1, 1)
pm.set_data({"X": X.flatten()}, model=model)
# Generate posterior samples
post_pred = pm.sample_posterior_predictive(trace, model=model, samples=1000)
/home/vscode/.local/lib/python3.9/site-packages/pymc3/sampling.py:1689: UserWarning: samples parameter is smaller than nchains times ndraws, some draws and/or chains may not be represented in the returned posterior predictive sample warnings.warn(
Plot model posterior predictive and actual weekly rolling average for positive values of epidemiological days
mask = data['Epidemiological Days'] >= 0
fig, ax = plt.subplots(figsize=(20, 10))
ax.plot(X, post_pred["cases"].T, color="k", alpha=0.05)
ax.plot(X, data.loc[mask, "Weekly Rolling Average"], color="r", label="Actual")
ax.set(xlabel="Days Since 50 Cases", ylabel="Weekly Rolling Average", title="Weekly Rolling Average")
ax.legend()
<matplotlib.legend.Legend at 0x7f1153040850>
# Augment epidemioligical days
forecast_ndays = 7
X_forecast = np.append(X, list(np.arange(X.max() + 1, X.max() + forecast_ndays + 1))).reshape(-1, 1)
# Update data reference.
pm.set_data({"X": X_forecast.flatten()}, model=model)
# Generate posterior samples
post_pred_forecast = pm.sample_posterior_predictive(trace, model=model, samples=1000)
/home/vscode/.local/lib/python3.9/site-packages/pymc3/sampling.py:1689: UserWarning: samples parameter is smaller than nchains times ndraws, some draws and/or chains may not be represented in the returned posterior predictive sample warnings.warn(
Create data set containing forecast data
mask = data["Epidemiological Days"] >= 0
data_forecast = pd.DataFrame({"Epidemiological Days": X_forecast.flatten()
,"Weekly Rolling Average": data.loc[mask, "Weekly Rolling Average"].append(pd.Series([np.nan]*forecast_ndays))
,"Daily Number of Cases": data.loc[mask, "Daily Number of Cases"].append(pd.Series([np.nan]*forecast_ndays))
,"Predicted Weekly Rolling Average": np.mean(post_pred_forecast["cases"], axis=0).flatten()
,"notification_date": data.loc[mask, "notification_date"].append(pd.Series([data["notification_date"].max() + timedelta(days=day) for day in range(1, forecast_ndays+1)]))
,"Predicted Weekly Rolling Std": np.std(post_pred_forecast["cases"], axis=0).flatten()
})
data_forecast.reset_index(inplace=True, drop=True)
data_forecast["Prediction Lower Bound"] = data_forecast["Predicted Weekly Rolling Average"] - data_forecast["Predicted Weekly Rolling Std"]
data_forecast["Prediction Upper Bound"] = data_forecast["Predicted Weekly Rolling Average"] + data_forecast["Predicted Weekly Rolling Std"]
data_forecast["Forecast_On_Date"] = datetime.now().date()
data_forecast[["Predicted Weekly Rolling Average", "Prediction Lower Bound", "Prediction Upper Bound", "Forecast_On_Date", "notification_date"]].to_csv(str(PROJECT_ROOT / "data" / "processed" / f"predicted_weekly_rolling_average_{datetime.now().date()}.csv"), index=False)
data_forecast
| Epidemiological Days | Weekly Rolling Average | Daily Number of Cases | Predicted Weekly Rolling Average | notification_date | Predicted Weekly Rolling Std | Prediction Lower Bound | Prediction Upper Bound | Forecast_On_Date | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 45.0 | 103.0 | 61.500197 | 2021-07-10 | 4.174820 | 57.325377 | 65.675017 | 2021-08-10 |
| 1 | 1.0 | 57.0 | 114.0 | 64.714352 | 2021-07-11 | 4.442222 | 60.272130 | 69.156574 | 2021-08-10 |
| 2 | 2.0 | 65.0 | 72.0 | 68.362033 | 2021-07-12 | 4.410703 | 63.951329 | 72.772736 | 2021-08-10 |
| 3 | 3.0 | 72.0 | 88.0 | 71.682769 | 2021-07-13 | 4.710465 | 66.972304 | 76.393234 | 2021-08-10 |
| 4 | 4.0 | 78.0 | 75.0 | 75.677586 | 2021-07-14 | 4.739968 | 70.937619 | 80.417554 | 2021-08-10 |
| 5 | 5.0 | 86.0 | 100.0 | 79.825827 | 2021-07-15 | 5.094211 | 74.731617 | 84.920038 | 2021-08-10 |
| 6 | 6.0 | 94.0 | 108.0 | 83.842043 | 2021-07-16 | 5.352006 | 78.490036 | 89.194049 | 2021-08-10 |
| 7 | 7.0 | 93.0 | 95.0 | 88.499962 | 2021-07-17 | 5.665845 | 82.834117 | 94.165807 | 2021-08-10 |
| 8 | 8.0 | 93.0 | 111.0 | 93.260447 | 2021-07-18 | 6.255717 | 87.004730 | 99.516164 | 2021-08-10 |
| 9 | 9.0 | 92.0 | 65.0 | 98.190451 | 2021-07-19 | 6.361500 | 91.828952 | 104.551951 | 2021-08-10 |
| 10 | 10.0 | 94.0 | 106.0 | 102.918440 | 2021-07-20 | 6.281123 | 96.637317 | 109.199563 | 2021-08-10 |
| 11 | 11.0 | 104.0 | 142.0 | 108.525912 | 2021-07-21 | 6.532090 | 101.993822 | 115.058002 | 2021-08-10 |
| 12 | 12.0 | 110.0 | 141.0 | 114.223185 | 2021-07-22 | 7.264580 | 106.958605 | 121.487764 | 2021-08-10 |
| 13 | 13.0 | 116.0 | 151.0 | 120.683579 | 2021-07-23 | 7.285887 | 113.397692 | 127.969466 | 2021-08-10 |
| 14 | 14.0 | 123.0 | 142.0 | 126.918794 | 2021-07-24 | 8.187476 | 118.731318 | 135.106270 | 2021-08-10 |
| 15 | 15.0 | 131.0 | 168.0 | 133.351110 | 2021-07-25 | 8.251517 | 125.099592 | 141.602627 | 2021-08-10 |
| 16 | 16.0 | 141.0 | 140.0 | 141.120748 | 2021-07-26 | 8.515365 | 132.605383 | 149.636113 | 2021-08-10 |
| 17 | 17.0 | 149.0 | 162.0 | 148.099689 | 2021-07-27 | 9.420464 | 138.679225 | 157.520153 | 2021-08-10 |
| 18 | 18.0 | 166.0 | 257.0 | 156.145269 | 2021-07-28 | 9.761741 | 146.383528 | 165.907009 | 2021-08-10 |
| 19 | 19.0 | 172.0 | 181.0 | 163.988994 | 2021-07-29 | 10.187280 | 153.801714 | 174.176273 | 2021-08-10 |
| 20 | 20.0 | 178.0 | 198.0 | 173.043933 | 2021-07-30 | 11.167784 | 161.876149 | 184.211718 | 2021-08-10 |
| 21 | 21.0 | 193.0 | 244.0 | 182.205208 | 2021-07-31 | 11.573017 | 170.632190 | 193.778225 | 2021-08-10 |
| 22 | 22.0 | 196.0 | 192.0 | 192.311111 | 2021-08-01 | 12.357901 | 179.953210 | 204.669013 | 2021-08-10 |
| 23 | 23.0 | 206.0 | 208.0 | 202.511934 | 2021-08-02 | 12.534993 | 189.976941 | 215.046927 | 2021-08-10 |
| 24 | 24.0 | 217.0 | 239.0 | 213.008802 | 2021-08-03 | 13.616731 | 199.392071 | 226.625532 | 2021-08-10 |
| 25 | 25.0 | 215.0 | 244.0 | 224.406724 | 2021-08-04 | 14.909242 | 209.497483 | 239.315966 | 2021-08-10 |
| 26 | 26.0 | 236.0 | 329.0 | 236.674463 | 2021-08-05 | 14.737020 | 221.937443 | 251.411483 | 2021-08-10 |
| 27 | 27.0 | 251.0 | 301.0 | 249.426510 | 2021-08-06 | 16.134741 | 233.291770 | 265.561251 | 2021-08-10 |
| 28 | 28.0 | 254.0 | 263.0 | 260.952917 | 2021-08-07 | 17.060307 | 243.892610 | 278.013224 | 2021-08-10 |
| 29 | 29.0 | 264.0 | 265.0 | 276.247895 | 2021-08-08 | 17.831729 | 258.416166 | 294.079624 | 2021-08-10 |
| 30 | 30.0 | 277.0 | 296.0 | 290.898249 | 2021-08-09 | 20.554062 | 270.344187 | 311.452311 | 2021-08-10 |
| 31 | 31.0 | NaN | NaN | 306.262415 | 2021-08-10 | 21.294525 | 284.967890 | 327.556940 | 2021-08-10 |
| 32 | 32.0 | NaN | NaN | 321.691924 | 2021-08-11 | 22.504959 | 299.186965 | 344.196884 | 2021-08-10 |
| 33 | 33.0 | NaN | NaN | 340.607535 | 2021-08-12 | 23.837462 | 316.770073 | 364.444997 | 2021-08-10 |
| 34 | 34.0 | NaN | NaN | 356.885546 | 2021-08-13 | 24.269904 | 332.615641 | 381.155450 | 2021-08-10 |
| 35 | 35.0 | NaN | NaN | 377.651410 | 2021-08-14 | 26.249692 | 351.401717 | 403.901102 | 2021-08-10 |
| 36 | 36.0 | NaN | NaN | 395.888649 | 2021-08-15 | 28.102348 | 367.786301 | 423.990997 | 2021-08-10 |
| 37 | 37.0 | NaN | NaN | 417.652973 | 2021-08-16 | 29.027925 | 388.625049 | 446.680898 | 2021-08-10 |
Plot actual and forecasted values
forecast_std = np.std(post_pred_forecast["cases"], axis=0).flatten()
fig, ax = plt.subplots(figsize=(20, 10))
ax = sns.lineplot(x=data_forecast["Epidemiological Days"], y=np.mean(post_pred_forecast["cases"], axis=0).flatten(), color="b", marker="o", label="Predicted Rolling Average", linestyle="--", ax=ax)
ax = sns.lineplot(x=data_forecast["Epidemiological Days"], y=np.mean(post_pred_forecast["cases"], axis=0).flatten() + forecast_std.flatten(), color="b", linestyle=":", label="Prediction Uncertainty", ax=ax)
ax = sns.lineplot(x=data_forecast["Epidemiological Days"], y=np.mean(post_pred_forecast["cases"], axis=0).flatten() - forecast_std.flatten(), color="b", linestyle=":", ax=ax)
ax = sns.lineplot(x=data_forecast["Epidemiological Days"], y=data_forecast["Weekly Rolling Average"], color="r", label="Actual Rolling Average", marker="o", ax=ax)
ax = sns.lineplot(x=data_forecast["Epidemiological Days"], y=data_forecast["Daily Number of Cases"], color="k", label="Daily Number of Cases", marker="o", ax=ax)
plt.xticks(data_forecast["Epidemiological Days"], data_forecast["notification_date"].astype(str), rotation = 30, ha="right")
ax.set(xlabel="Day", ylabel="Number of Cases", title="Number of Cases Since 50 Cases")
ax.legend()
ax.grid(True)
%load_ext watermark
%watermark -n -u -v -iv -w
Last updated: Tue Aug 10 2021 Python implementation: CPython Python version : 3.9.2 IPython version : 7.25.0 arviz : 0.11.2 seaborn : 0.11.1 pymc3 : 3.11.2 numpy : 1.19.5 matplotlib: 3.4.2 pandas : 1.3.0 sys : 3.9.2 (default, Mar 12 2021, 19:04:51) [GCC 8.3.0] Watermark: 2.2.0